pacman::p_load(tidyverse, readr, psych, st, stars, tmap, sf,
ggstatsplot, plotly, ggplot2, ggdist, dplyr, ggiraph)Exploratory Data Analysis - Temperature
1 Import Packages
2 Load the prepared files
Let’s load the RDS files after data preparation.
temperature <- readRDS("data/temperature.rds")
describe(temperature) vars n mean sd median trimmed mad min max range skew
Station* 1 3715 10.00 5.37 10.0 10.12 7.41 1.0 18.0 17.0 -0.12
Region* 2 3715 3.33 1.41 3.0 3.41 1.48 1.0 5.0 4.0 -0.24
Year 3 3715 2010.64 10.07 2013.0 2011.83 8.90 1982.0 2023.0 41.0 -0.96
Month* 4 3715 6.53 3.45 7.0 6.54 4.45 1.0 12.0 11.0 -0.01
Date 5 3715 NaN NA NA NaN NA Inf -Inf -Inf NA
MeanTemp 6 3715 27.70 0.85 27.7 27.70 0.89 24.9 30.0 5.1 -0.05
MaxTemp 7 3715 33.85 0.98 33.9 33.86 0.89 30.4 37.9 7.5 -0.11
MinTemp 8 3715 22.62 0.94 22.6 22.63 0.74 0.0 26.2 26.2 -7.42
kurtosis se
Station* -1.39 0.09
Region* -1.30 0.02
Year -0.02 0.17
Month* -1.21 0.06
Date NA NA
MeanTemp -0.40 0.01
MaxTemp 0.28 0.02
MinTemp 178.72 0.02
3 Map of Singapore
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>%
st_transform(crs=3414)Reading layer `MPSZ-2019' from data source
`C:\Vanessa\SMU\Term 4 - Visual Analytics & Applications\mvheng\Group11_VAP\EDA\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
Let’s take a look at the planning areas for the 5 regions.
tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "REGION_N", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tmap_options(check.and.fix = TRUE) +
tm_view(set.zoom.limits = c(11,12))4 Temperature analysis
4.1 Analyse temperature using maps
Let’s map the station to the planning area (PA).
Show the code
station_to_PA <- c(
"Admiralty" = "WOODLANDS",
"Ang Mo Kio" = "ANG MO KIO",
"Boon Lay (East)" = "BOON LAY",
"Changi" = "CHANGI",
"Choa Chu Kang (South)" = "CHOA CHU KANG",
"Clementi" = "CLEMENTI",
"East Coast Parkway" = "BEDOK",
"Jurong (West)" = "JURONG WEST",
"Khatib" = "YISHUN",
"Marina Barrage" = "DOWNTOWN CORE",
"Newton" = "NEWTON",
"Pasir Panjang" = "PASIR PANJANG",
"Paya Lebar" = "PAYA LEBAR",
"Seletar" = "SELETAR",
"Sembawang" = "SEMBAWANG",
"Tai Seng" = "HOUGANG",
"Tengah" = "TENGAH",
"Tuas South" = "TUAS"
)
temperature$PA <- station_to_PA[temperature$Station]
temperature <- temperature[, c("PA", setdiff(names(temperature), "PA"))]
head(temperature)# A tibble: 6 × 9
PA Station Region Year Month Date MeanTemp MaxTemp MinTemp
<chr> <chr> <chr> <dbl> <ord> <date> <dbl> <dbl> <dbl>
1 WOODLANDS Admiralty North 2009 Jan 2009-01-01 26.3 31.9 23.3
2 WOODLANDS Admiralty North 2009 Feb 2009-02-01 26.8 33.4 23
3 WOODLANDS Admiralty North 2009 Mar 2009-03-01 26.9 34.5 22.2
4 WOODLANDS Admiralty North 2009 Apr 2009-04-01 28.1 35.1 23.7
5 WOODLANDS Admiralty North 2009 May 2009-05-01 28.5 34.7 21.8
6 WOODLANDS Admiralty North 2009 Jun 2009-06-01 28.9 34.7 23.7
temp_map <- temperature %>%
group_by(PA, Station, Year) %>%
summarise(Annual_Mean_Temperature =
mean(MeanTemp, na.rm = TRUE),
Annual_Maximum_Temperature =
max(MaxTemp, na.rm = TRUE),
Annual_Minimum_Temperature =
min(MinTemp, na.rm = TRUE)) %>%
ungroup()
glimpse(temp_map)Rows: 323
Columns: 6
$ PA <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "…
$ Station <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "…
$ Year <dbl> 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2…
$ Annual_Mean_Temperature <dbl> 27.40000, 27.71667, 27.31667, 27.50833, 27.…
$ Annual_Maximum_Temperature <dbl> 34.5, 36.0, 35.4, 34.8, 35.6, 35.0, 34.9, 3…
$ Annual_Minimum_Temperature <dbl> 21.8, 21.7, 21.5, 21.8, 20.0, 21.8, 20.3, 2…
mpsztemp <- left_join(mpsz, temp_map,
by = c("PLN_AREA_N" = "PA"))
glimpse(mpsztemp)Rows: 2,357
Columns: 12
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTS…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MU…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE R…
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRA…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "…
$ Station <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Year <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Mean_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Maximum_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Annual_Minimum_Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29...…
Let’s plot the annual mean temperature distribution across Singapore.
tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Mean_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))
Note
It seems like the northern area of Singapore has a cooler mean temperature.
Let’s compare the maximum and minimum temperatures.
tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Maximum_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Minimum_Temperature",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))